library(sf)
library(leaflet)
library(terra)
library(sp)
library(rnaturalearth)
library(dplyr)
library(leaflegend)
library(htmltools)
library(ggplot2)
library(plotly)

Pelago dataset

The database used in this study contains 3468 observations of the 11 variables briefly described in Table 1.

dados = read.csv2("C:/Users/vieir/OneDrive/Ambiente de Trabalho/UMinho/Mestrado - Estatística para Ciência de Dados/2º Ano/Projeto em Estatística/Dados/dados_completos.csv",
                  header = T,sep=";")

Table 1: Presentation of the variables in the Pelago database

Variable Description Type
RAD Radials - acoustic survey areas (from 1 to 22) Categorical
cruzcod Anonymous vessel identification Categorical
Year Year of the campaign (2015 to 2022) Categorical
longdec Longitude coordinate (in °) Quantitative
latdec Latitude coordinate (in °) Quantitative
depth Depth, estimated, of abundance (in m) Quantitative
ANE.EA Biomass index1 of anchovy (in m²/nmi²) Quantitative
Day Day of observation (from 1 to 30) Date
Month Month of observation (3 to 5) Date
SST Sea surface temperature (in °C) Quantitative
CHL Chlorophyll concentration in seawater (in mg/m³) Quantitative
Presence 0: Absence of anchovy
1: Presence of anchovy2
Categorical

1 This index represents the Nautical Area-Scattering Coefficients (NASC) where nmi represents nautical miles.

2 The presence of anchovy is considered if the abundance value is strictly positive.

We have treated the data by assuming that the observations either have a strictly positive biomass index, indicating the presence of anchovy at that point, or do not have one, indicating its absence. It is important to note that not all variables are of interest in an exploratory analysis. In this case, the variables cruzcod, longdec, latdec, and Day are not of interest.

Presence/Absence

# Retirar Variáveis com pouco interesse
dados = dados[,-c(3,4,6,12)]

# Subset - apenas acima dos 39'
dados.sub <- dados[dados$latdec > 39,]
rownames(dados.sub) <- 1:nrow(dados.sub)

# Transformar as variaveis
dados.sub$long.utm = as.numeric(dados.sub$long.utm)
dados.sub$lat.utm = as.numeric(dados.sub$lat.utm)
dados.sub$year = as.factor(dados.sub$year)
dados.sub$RAD = as.factor(dados.sub$RAD)
dados.sub$Day = as.numeric(dados.sub$Day)
dados.sub$Month = as.factor(dados.sub$Month)
dados.sub$longdec = as.numeric(dados.sub$longdec)
dados.sub$latdec = as.numeric(dados.sub$latdec)
dados.sub$depth = as.numeric(dados.sub$depth)
dados.sub$ANE.EA = as.numeric(dados.sub$ANE.EA)
dados.sub$SST = as.numeric(dados.sub$SST)
dados.sub$CHL = as.numeric(dados.sub$CHL)
dados.sub$log_CHL = log(dados.sub$CHL)

# Retirar os níveis que não são usados
dados.sub$Month <- droplevels(dados.sub$Month)
dados.sub$RAD = droplevels(dados.sub$RAD)

# Ver presenças e ausências
dados.sub$presenca = ifelse(dados.sub$ANE.EA == 0, 0, 1)

# Tranformar em variavel categorica
dados.sub$presenca = as.factor(dados.sub$presenca)

Table 2: Anchovy presence and absence over different study years.

Year Presences Absences Observations Presence Proportion (%) Absence Proportion (%) Mean Abundance (m²/nmi²)
2015 12 378 390 3.08% 96.9% 702
2016 68 404 472 14.4% 85.6% 667
2017 23 442 465 4.95% 95.1% 597
2018 22 418 440 5% 95% 2482
2019 11 446 457 2.41% 97.6% 290
2020 55 345 400 13.8% 86.2% 877
2021 94 378 472 19.9% 80.1% 428
2022 280 92 372 75.3% 24.7% 317
Total 565 2903 3468 16.3% 83.7% 795

Table 2 presents the number of observations with the presence or absence of anchovy and the respective mean abundance (considering abundance as the value of the biomass index estimated by NASC, as mentioned in Table 3.1) in locations where anchovy were present.

Overall, 16.3% of the observations indicate the presence of anchovy. However, the proportion of presences has been above this value since 2020, with the proportion of presences being close to 75% in 2022.

Although the proportion of presences has increased post-2020, the mean abundance has been decreasing. This may indicate that the anchovy were found in smaller schools in 2021 or 2022, in contrast to 2018, where, although less distributed along the Portuguese coast, the anchovy were found in larger schools.

Sea Level Temperature and Chlorophyll

# Criar o gráfico para Temperatura (SST)
p1 <- ggplot(dados.sub, aes(x = presenca, y = SST, fill = presenca)) +
  geom_boxplot() +
  scale_fill_manual(values = c("red2", "green4"), labels = c("Absence", "Presence")) +
  labs(x = "Presence", y = "Sea Surface Temperature (°C)", fill = "Presence") +
  theme_minimal()

p1_interactive <- ggplotly(p1)

# Criar o gráfico para Clorofila (log(CHL))
p2 <- ggplot(dados.sub, aes(x = presenca, y = log(CHL), fill = presenca)) +
  geom_boxplot() +
  scale_fill_manual(values = c("red2", "green4"), labels = c("Absence", "Presence")) +
  labs(x = "Presence", y = "Chlorophyll Concentration (log)", fill = "Presence") +
  theme_minimal()

p2_interactive <- ggplotly(p2)

p1_interactive
p2_interactive

Mapping

# Carregue os dados de fronteira de Portugal
portugal <- ne_countries(country = "Portugal", scale = "large", returnclass = "sf")
espanha <- ne_countries(country = "Spain", scale = "large", returnclass = "sf")

# Assuma que 'dados' é seu data.frame original com as colunas 'long.utm' e 'lat.utm'
d <- data.frame(lon = dados.sub[,5], lat = dados.sub[,6])
coordinates(d) <- ~lon + lat
proj4string(d) <- CRS("+proj=utm +zone=29 +ellps=WGS84 +datum=WGS84 +units=m +no_defs +towgs84=0,0,0")
CRS.new <- CRS("+proj=longlat +datum=WGS84 +no_defs")
d.longlat <- spTransform(d, CRS.new)

# Combine as coordenadas transformadas com os dados originais
dados.longlat <- cbind(dados.sub, data.frame(coordinates(d.longlat)))
names(dados.longlat)[ncol(dados.longlat)-1] <- "longdec"
names(dados.longlat)[ncol(dados.longlat)] <- "latdec"

# Crie um objeto sf a partir do data.frame transformado
dados.sf <- st_as_sf(dados.longlat, coords = c("longdec", "latdec"), crs = 4326)

# Definir cores para presença e ausência
pal <- colorFactor(palette = c("red", "green"), domain = dados.sf$presenca)

# Define a função para escalar os tamanhos dos pontos
scale_size <- function(x, min_size = 3, max_size = 10) {
  scaled_size <- ((x - min(x, na.rm = TRUE)) / (max(x, na.rm = TRUE) - min(x, na.rm = TRUE))) * (max_size - min_size) + min_size
  return(scaled_size)
}

# Crie o mapa com a visualização inicial em Portugal
leaflet(dados.sf) %>%
  addTiles(urlTemplate = "https://{s}.basemaps.cartocdn.com/rastertiles/voyager/{z}/{x}/{y}.png") %>%
  addCircleMarkers(data = dados.sf, 
                   radius = ~scale_size(ANE.EA)*1.5,  # Use a coluna escalada para o tamanho dos pontos
                   color = ~pal(presenca), 
                   fillColor = ~pal(presenca),
                   fillOpacity = 0.8,  # Define a opacidade dos pontos
                   stroke = FALSE,  # Remove os limites dos pontos
                   label = ~sprintf(
                     "Year: %s<br>Month: %s<br>SST: %.2f °C<br>CHL: %.2f mg/m³<br>Abundance: %.2f m²/mn²", 
                     year, Month, SST, CHL, ANE.EA
                   ) %>% lapply(htmltools::HTML)) %>%
  addLegend("bottomright", 
            pal = pal, 
            values = dados.sf$presenca, 
            title = "Presença", 
            labels = c("Ausência", "Presença"),
            opacity = 1) %>%
  setView(lng = -8, lat = 39.5, zoom = 7)  # Coordenadas para centralizar em Portugal

The 2022 case.

# Crie o mapa com a visualização inicial em Portugal
leaflet(dados.sf) %>%
  addTiles(urlTemplate = "https://{s}.basemaps.cartocdn.com/rastertiles/voyager/{z}/{x}/{y}.png") %>%
  addCircleMarkers(data = dados.sf[dados.sf$year=="2022",], 
                   radius = ~scale_size(ANE.EA)*1.5,  # Use a coluna escalada para o tamanho dos pontos
                   color = ~pal(presenca), 
                   fillColor = ~pal(presenca),
                   fillOpacity = 0.8,  # Define a opacidade dos pontos
                   stroke = FALSE,  # Remove os limites dos pontos
                   label = ~sprintf(
                     "SST: %.2f °C<br>CHL: %.2f mg/m³<br>Abundance: %.2f m²/mn²", 
                     SST, CHL, ANE.EA
                   ) %>% lapply(htmltools::HTML)) %>%
  addLegend("bottomright", 
            pal = pal, 
            values = dados.sf$presenca, 
            title = "Presença", 
            labels = c("Ausência", "Presença"),
            opacity = 1) %>%
  setView(lng = -8, lat = 39.5, zoom = 7)  # Coordenadas para centralizar em Portugal